Maps of where the Fortune 500 companies - that is the five hundred largest U.S. corporations by total revenue - have their headquarters and how HQ loactions are related to state tax rate.
Get the addresses of Fortune 500 headquarter addresses from Geography Realm.
fortune <- read.csv("data/fortune1000.csv")
names(fortune) <- c("latitude", "longitude", "company", "rank", "years_on_list", "city", "state")
fortune <- fortune %>% filter(rank <= 500)
Aggregate the number of headquarters by state. Make a map in which the states are shaded by their number of Fortune 500 companies.
library(openintro)
fortune_by_state <- fortune %>%
mutate(state_name = abbr2state(state)) %>%
group_by(state_name, state) %>%
tally()
library(albersusa)
state_fortune <- usa_sf("laea") %>%
left_join(fortune_by_state, c("name"="state_name"))
statesToLabel <- c("NY", "CA", "TX")
ggplot(state_fortune) +
geom_sf(aes(fill = n),
color = "gray95", size = 0.25, alpha = 0.9) +
scale_fill_gradient(low="darksalmon", high="brown4", na.value="gray80") +
geom_sf_text(aes(label = paste0(state,'\n',n)),
data = filter(state_fortune, state %in% statesToLabel),
color = "white", size = 3, lineheight = .75, fontface = "bold") +
labs(title = "NY, CA, & TX: States with the most Fortune 500 company headquarters",
subtitle = "Number of Fortune 500 company headquarters by state in 2018",
caption = "Source: Geography Realm") +
theme_map() +
theme(
legend.position="right",
legend.title=element_blank(),
plot.subtitle=element_text(size=10, color="grey40", face="italic"),
plot.caption=element_text(size=7, color="grey40")
)
library(statebins)
state_fortune <- state_fortune %>%
mutate(cnt_group = case_when(
is.na(n) ~ "0",
n < 10 ~ "< 10",
n >=10 & n <20 ~ "10-20",
n >=20 & n <40 ~ "20-40",
n >=40 ~ "> 40"
))
state_fortune$cnt_group <- factor(state_fortune$cnt_group, levels = c("> 40","20-40","10-20","< 10","0"))
statebins(state_fortune, state_col = "name", value_col = "cnt_group",
#palette = "YlOrBr", direction=1,
ggplot2_scale_function = scale_fill_manual,
values = c("#993404","#d95f0e","#fe9929","#fed98e","#ffffd4"),
round=TRUE,
name = ""
) +
labs(title = "Number of Fortune 500 company headquarters by state in 2018") +
theme_statebins(
legend_position = "right"
)
library(leaflet)
library(geojsonio)
states <- geojsonio::geojson_read("data/gz_2010_us_040_00_500k.json", what = "sp")
states@data <- states@data %>%
left_join(fortune_by_state, by = c("NAME"="state_name")) %>%
replace_na(list(n=0))
bins <- c(0, 1, 10, 20, 40, 60)
pal <- colorBin("YlOrBr", domain = state_fortune$n, bins = bins)
popupcontent <- sprintf("<strong>%s</strong><br/>%s",
states@data$NAME,
ifelse(is.na(states@data$n),"",paste(states@data$n, "headquarters"))
) %>%
lapply(htmltools::HTML)
m <- leaflet(states, options = leafletOptions(minZoom = 3)) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -99.8421, lat = 39.3647, zoom = 3) %>%
addPolygons(
fillColor = ~pal(n),
stroke = FALSE,
smoothFactor = 0.5,
fillOpacity = 0.7,
label = popupcontent,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addPopups(
lng = -73.9855, lat = 42.7580,
paste("NY has", states@data %>% filter(NAME == "New York") %>% select(n) %>% as.numeric(),"Fortune500 headquarters"),
options = popupOptions(closeButton = FALSE)
) %>%
addControl("Number of Fortune 500 Company Headquarters", position = "topright") %>%
addLegend(colors = c("#993404","#d95f0e","#fe9929","#fed98e","#ffffd4"),
labels = c("> 40","20-40","10-20","< 10","none"),
opacity = 0.7, title = NULL, position = "bottomright")
m
#library(htmlwidgets)
#saveWidget(widget = m,
# file = "fortune500_by_state.html",
# selfcontained = TRUE)
Download state corporate income tax rates and extract top tax rate at each state. Merge to sp data.
read_excel("data\\state_corporate_income_tax_rates_2018.xlsx", sheet = "Sheet1") %>%
select(State, Rates) %>%
group_by(State) %>%
slice(which.max(Rates)) %>%
write.csv("data\\state_corporate_income_tax_rates_2018.csv", row.names = F)
library("readxl")
corptax <- read_excel("data\\state_corporate_income_top_tax_rates_2018.xlsx")
states <- geojsonio::geojson_read("data/gz_2010_us_040_00_500k.json", what = "sp")
states@data <- states@data %>%
left_join(corptax, by = c("NAME"="state"))
Thanks to [@mpriem89](https://github.com/rstudio/leaflet/issues/256) for creating the addLegend_decreasing function that enables order legend decreasingly.
pal <- colorNumeric(palette = "YlOrBr", domain = corptax$topcorpinctax)
popupcontent <- sprintf("<strong>%s</strong><br/>%s",
states@data$NAME,
paste0("Top Corporate Income Tax: ",round(states@data$topcorpinctax*100,2),"%")
) %>%
lapply(htmltools::HTML)
# Credit to @mpriem89 (https://github.com/rstudio/leaflet/issues/256) for this neat function
addLegend_decreasing <- function (map, position = c("topright", "bottomright", "bottomleft",
"topleft"), pal, values, na.label = "NA", bins = 7, colors,
opacity = 0.5, labels = NULL, labFormat = labelFormat(),
title = NULL, className = "info legend", layerId = NULL,
group = NULL, data = getMapData(map), decreasing = FALSE) {
position <- match.arg(position)
type <- "unknown"
na.color <- NULL
extra <- NULL
if (!missing(pal)) {
if (!missing(colors))
stop("You must provide either 'pal' or 'colors' (not both)")
if (missing(title) && inherits(values, "formula"))
title <- deparse(values[[2]])
values <- evalFormula(values, data)
type <- attr(pal, "colorType", exact = TRUE)
args <- attr(pal, "colorArgs", exact = TRUE)
na.color <- args$na.color
if (!is.null(na.color) && col2rgb(na.color, alpha = TRUE)[[4]] == 0) {
na.color <- NULL
}
if (type != "numeric" && !missing(bins))
warning("'bins' is ignored because the palette type is not numeric")
if (type == "numeric") {
cuts <- if (length(bins) == 1)
pretty(values, bins)
else bins
if (length(bins) > 2)
if (!all(abs(diff(bins, differences = 2)) <= sqrt(.Machine$double.eps)))
stop("The vector of breaks 'bins' must be equally spaced")
n <- length(cuts)
r <- range(values, na.rm = TRUE)
cuts <- cuts[cuts >= r[1] & cuts <= r[2]]
n <- length(cuts)
p <- (cuts - r[1])/(r[2] - r[1])
extra <- list(p_1 = p[1], p_n = p[n])
p <- c("", paste0(100 * p, "%"), "")
if (decreasing == TRUE){
colors <- pal(rev(c(r[1], cuts, r[2])))
labels <- rev(labFormat(type = "numeric", cuts))
}else{
colors <- pal(c(r[1], cuts, r[2]))
labels <- rev(labFormat(type = "numeric", cuts))
}
colors <- paste(colors, p, sep = " ", collapse = ", ")
}
else if (type == "bin") {
cuts <- args$bins
n <- length(cuts)
mids <- (cuts[-1] + cuts[-n])/2
if (decreasing == TRUE){
colors <- pal(rev(mids))
labels <- rev(labFormat(type = "bin", cuts))
}else{
colors <- pal(mids)
labels <- labFormat(type = "bin", cuts)
}
}
else if (type == "quantile") {
p <- args$probs
n <- length(p)
cuts <- quantile(values, probs = p, na.rm = TRUE)
mids <- quantile(values, probs = (p[-1] + p[-n])/2,
na.rm = TRUE)
if (decreasing == TRUE){
colors <- pal(rev(mids))
labels <- rev(labFormat(type = "quantile", cuts, p))
}else{
colors <- pal(mids)
labels <- labFormat(type = "quantile", cuts, p)
}
}
else if (type == "factor") {
v <- sort(unique(na.omit(values)))
colors <- pal(v)
labels <- labFormat(type = "factor", v)
if (decreasing == TRUE){
colors <- pal(rev(v))
labels <- rev(labFormat(type = "factor", v))
}else{
colors <- pal(v)
labels <- labFormat(type = "factor", v)
}
}
else stop("Palette function not supported")
if (!any(is.na(values)))
na.color <- NULL
}
else {
if (length(colors) != length(labels))
stop("'colors' and 'labels' must be of the same length")
}
legend <- list(colors = I(unname(colors)), labels = I(unname(labels)),
na_color = na.color, na_label = na.label, opacity = opacity,
position = position, type = type, title = title, extra = extra,
layerId = layerId, className = className, group = group)
invokeMethod(map, data, "addLegend", legend)
}
leaflet(states, options = leafletOptions(minZoom = 3)) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -99.8421, lat = 39.3647, zoom = 3) %>%
addPolygons(
fillColor = ~pal(topcorpinctax),
stroke = FALSE,
smoothFactor = 0.5,
fillOpacity = 0.7,
label = popupcontent,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addControl("State Top Corporate Income Tax Rates", position = "topright") %>%
addLegend_decreasing(pal = pal,
values = ~topcorpinctax,
position = "bottomright",
title = "Tax Rate",
opacity = 1,
decreasing = TRUE,
labFormat = labelFormat(
transform = function(x) 100 * x,
suffix = "%"))
tax_hq <- corptax %>%
left_join(fortune_by_state, by = c("state"="state_name")) %>%
select(state, topcorpinctax, n) %>%
replace_na(list(n = 0))
ggplot(tax_hq, aes(x = topcorpinctax, y = n)) +
geom_point() +
#geom_smooth(method="loess" , color="gray50", fill="wheat", se=T, alpha = 0.3) +
geom_text(data = tax_hq %>% filter(state %in% c("New York", "California", "Texas", "Iowa")),
aes(label = state),
hjust = 0, nudge_x = 0.001) +
scale_x_continuous(labels = scales::percent) +
ylim(0, NA) +
labs(title = "No evidence of companies being headquartered in low tax states",
subtitle = "Top corporate income tax rate and number of Fortune 500 headquarters",
caption = "Year: 2018") +
xlab("top corporate income tax rate") +
ylab("number of headquarters") +
theme_hc() +
theme(legend.position="none",
plot.subtitle=element_text(size=10, color="grey40", face="italic"),
plot.caption=element_text(size=7, color="grey40"),
axis.title.x=element_text(size=10, color="grey40"),
axis.title.y=element_text(size=10, color="grey40"))
To account for state population, calculate number of HQs per 1000,000 people.
Population data (estimate, July 2019) on state populations is imported from Wikipedia.
library(rvest)
url <- "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population"
state_population <- url %>%
html() %>%
html_node("table") %>%
html_table(fill = TRUE)
state_population <- state_population[, 3:4]
state_population <- state_population %>%
slice(2:n()) %>%
select(state = State, population = "Census population") %>%
mutate(population = as.numeric(gsub(",","",population))) %>%
right_join(corptax) %>%
left_join(fortune_by_state, by = c("state"="state_name")) %>%
replace_na(list(n=0)) %>%
select(state, population, topcorpinctax, n) %>%
mutate(hq.per.capita = n/population*1000000) # number of HQs per million people
ggplot(state_population, aes(x = topcorpinctax, y = hq.per.capita)) +
geom_point() +
#geom_smooth(method="loess" , color="gray50", fill="wheat", se=T, alpha = 0.3) +
geom_text(data = state_population %>% filter(state %in% c("New York", "California", "Texas", "Iowa")),
aes(label = state),
hjust = 0, nudge_x = 0.001,
color = "gray60") +
scale_x_continuous(labels = scales::percent) +
ylim(0,NA) +
labs(title = "No evidence of companies being headquartered in low tax states",
subtitle = "Top corporate income tax rate and number of Fortune 500 headquarters per million people",
caption = "Year: 2018") +
xlab("top corporate income tax rate") +
ylab("number of headquarters per million people") +
theme_hc() +
theme(legend.position="none",
plot.subtitle=element_text(size=10, color="grey40", face="italic"),
plot.caption=element_text(size=7, color="grey40"),
axis.title.x=element_text(size=10, color="grey40"),
axis.title.y=element_text(size=10, color="grey40"))
Add locations of all headquarters to the state map shaded by tax rates. Size the points by the ranking on the Fortune 500 list.
states <- geojsonio::geojson_read("data/gz_2010_us_040_00_500k.json", what = "sp")
states@data <- states@data %>%
left_join(corptax, by = c("NAME"="state"))
pal <- colorNumeric(palette = "YlOrBr", domain = corptax$topcorpinctax)
popupPolygons <- sprintf(
"<strong>%s</strong><br/>%s",
states@data$NAME,
paste0("Top Corp Income Tax: ",round(states@data$topcorpinctax*100,2),"%")
) %>%
lapply(htmltools::HTML)
fortune <- fortune %>%
left_join(corptax %>% mutate(state_abb = state2abbr(state)), by = c("state"="state_abb"))
popupCircle <- sprintf(
"<strong>%s</strong><br/>%s<br/>%s<br/>%s",
fortune$company,
paste0("Rank: ",fortune$rank),
paste0(fortune$city,", ",fortune$state),
paste0("Top Corp Income Tax: ",round(fortune$topcorpinctax*100,2),"%")
) %>%
lapply(htmltools::HTML)
leaflet(states, options = leafletOptions(minZoom = 3)) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -99.8421, lat = 39.3647, zoom = 3) %>%
addPolygons(
fillColor = ~pal(topcorpinctax),
stroke = FALSE,
smoothFactor = 0.5,
fillOpacity = 0.7,
label = popupPolygons
) %>%
addCircleMarkers(
lng = fortune$longitude, lat = fortune$latitude,
radius = 20/fortune$rank,
color = "#00A4CC",
stroke = FALSE, fillOpacity = 0.5,
label = popupCircle
) %>%
addControl("Fortune 500 HQs and State Top Corporate Income Tax Rates", position = "topright") %>%
addLegend_decreasing(pal = pal,
values = ~topcorpinctax,
position = "bottomright",
title = "Tax Rate",
opacity = 1,
decreasing = TRUE,
labFormat = labelFormat(
transform = function(x) 100 * x,
suffix = "%"))